Next | Prev | Up | Top | Contents | Index

Source File

/**************************************************************************
 *                                                                       *
 *                  Copyright (C) 1993, Silicon Graphics, Inc.            *
 *                                                                       *
 *  These coded instructions, statements, and computer programs  contain  *
 *  unpublished  proprietary  information of Silicon Graphics, Inc., and  *
 *  are protected by Federal copyright law.  They  may  not be disclosed  *
 *  to  third  parties  or copied or duplicated in any form, in whole or  *
 *  in part, without the prior written consent of Silicon Graphics, Inc.  *
 *                                                                       *
 **************************************************************************/
/**************************************************************************
| This sample IRIX device driver implements a "ram disk" -- a block of
| kernel memory accessed as if it were a disk.  The driver supports both
| block and character interfaces and is loadable and unloadable.
|
| N   N   OO   TTTTT  EEEEE      It does not make sense to use a ram disk
| NN  N  O  O    T    E          in a system like IRIX that implements
| N N N  O  O    T    EEE   ::   effective virtual memory.  This device
| N  NN  O  O    T    E          driver is useful as an example because
| N   N   OO     T    EEEEE ::   it has no hardware dependencies, and so
|                                can be tried out in any IRIX system.
| However, this driver SHOULD NOT be employed in a production system!
| It WILL NOT give better performance.  It WILL consume kernel memory
| that would be better used for buffers.
|**************************************************************************/

#include <sys/ddi.h>        /* gets also sys/types.h and sys/buf.h */
#include <sys/conf.h>       /* for driver flags D_MP etc */
#include <sys/kmem.h>       /* kmem_alloc and friends */
#include <sys/sema.h>       /* the rd_info_t contains a semaphore */
#include <sys/dvh.h>        /* the rd_info_t contains struct volume_header */
#include "ramdrive.h"       /* declare rd_info_t, etc. */
#include <sys/edt.h>        /* declare edt_t for edtinit() */
#include <sys/errno.h>      /* error codes to return */
#include <sys/cmn_err.h>    /* cmn_err() and related constants */
#include <sys/cred.h>       /* cred_t for prototypes */
#include <sys/dkio.h>       /* DKIOC* constants for ioctl */
#include <sys/param.h>      /* NBPSC bytes per sector */
#include <sys/immu.h>       /* IONBPP bytes per I/O page, btod() */
#include <sys/file.h>       /* FEXCL and other open flags */
#include <sys/open.h>       /* OTYP_CHR, OTYP_BLK */
#include <sys/region.h>     /* for vhandl_t */
#include <sys/mload.h>      /* only for M_VERSION */
/**************************************************************************
| Debug display macros: one each for cmn_err calls with 0, 1, 2, or 3
| variable arguments.
|**************************************************************************/
#ifdef DEBUG
#define DBGMSG0(s) cmn_err(CE_DEBUG,s)
#define DBGMSG1(s,x) cmn_err(CE_DEBUG,s,x)
#define DBGMSG2(s,x,y) cmn_err(CE_DEBUG,s,x,y)
#define DBGMSG3(s,x,y,z) cmn_err(CE_DEBUG,s,x,y,z)
#define DBGMSG4(s,x,y,z,w) cmn_err(CE_DEBUG,s,x,y,z,w)
#else
#define DBGMSG0(s)
#define DBGMSG1(s,x)
#define DBGMSG2(s,x,y)
#define DBGMSG3(s,x,y,z)
#define DBGMSG4(s,x,y,z,w)
#endif
/**************************************************************************
| Driver flag: this driver is MP-safe.  Also version flag for mload.
|**************************************************************************/
unsigned rd_devflag = D_MP;
char *rd_mversion = M_VERSION;
/**************************************************************************
| Array of rd_info_t objects, one per allowed minor device.  We rely on
| the loader to ensure these static globals are zero until initialized!
| Also defined: two convenience macros for frequent expressions.
|**************************************************************************/
static rd_info_t *rd_array;
#define INFOPTR(dev) &rd_array[geteminor(dev)]
#define VALIDIO(prd,off,len) (((off_t)(off) + (off_t)(len)) <= prd->size)
/**************************************************************************
| rd_basic() is called from rd_edtinit() to allocate the rd_array based
| on the global rd_numdevs, an integer set to ##D in the configuration
| file /var/sysgen/master.d/ramdrive.  Also display the other available
| globals for debugging purposes.
|**************************************************************************/
extern int rd_e_major, rd_numdevs, rd_ctrlrs;
int
rd_basic(void)
{
    if (!rd_array)
    {
        register int size;
        DBGMSG3("ramdrive basic: ##E=%d, ##D=%d, ##C=%d\n",
            rd_e_major, rd_numdevs, rd_ctrlrs);
        if (size = rd_numdevs*sizeof(rd_info_t))
            rd_array = (rd_info_t *)kmem_zalloc(size,KM_SLEEP);
        else
            cmn_err(CE_ALERT,"ramdrive: confused");
    }
    return (0 != rd_array);
}
/**************************************************************************
| rd_init() is included solely to demonstrate that this entry point
| can be called in addition to rd_edtinit() and rd_start().
|**************************************************************************/
int
rd_init(void)
{
    DBGMSG0("rd_init entry point called\n");
    return 0;
}
/**************************************************************************
| rd_start() is included solely to demonstrate that it, too can be called
| in addition to rd_edtinit() and rd_init().
|**************************************************************************/
int
rd_start(void)
{
    DBGMSG0("rd_start entry point called\n");
    return 0;
}
/**************************************************************************
| rd_format() is a subroutine of both rd_edtinit() and rd_ioctl() which
| "formats" the ramdrive to zeros with a reasonable volume header.
| The volume header (set in both the info struct and "sector 0") 
| describes standard SGI partitions:
|   10 == the whole "drive"
|    8 == the volume header, only one sector in this case
|    7 == all sectors except the volume header
|    0 == data ("root") same as 7
|    1 == swap contains 0 sectors
| For versimilitude we arbitrarily say we have 1 track/cylinder
| and 8 sectors/track.  This assumes that nsectors is a multiple of 8,
| which is a good bet when the allocated size is a multiple of IO pages
| and sectors are 512 bytes.
|**************************************************************************/
void
rd_format(register rd_info_t *prd)
{
    register struct volume_header *pvh = &prd->vh;
    register int nsectors = btod(prd->size);/* immu.h */

    bzero((void *)pvh,sizeof(struct volume_header));
    pvh->vh_magic = VHMAGIC; /* in sys/dvh.h */
    pvh->vh_rootpt = 0;
    pvh->vh_swappt = 1; 
    pvh->vh_dp.dp_cyls = nsectors/8; /* number of cylinders */ 
    pvh->vh_dp.dp_trks0 = 1; /* tracks/cyl */
    pvh->vh_dp.dp_secs = 8; /* sectors/track */
    pvh->vh_dp.dp_secbytes = NBPSCTR; /* param.h */
    pvh->vh_dp.dp_interleave = 1;

    pvh->vh_pt[10].pt_firstlbn = 0;
    pvh->vh_pt[10].pt_nblks = nsectors;
    pvh->vh_pt[10].pt_type = PTYPE_VOLUME;
    pvh->vh_pt[ 8].pt_firstlbn = 0;
    pvh->vh_pt[ 8].pt_nblks = 1;
    pvh->vh_pt[ 8].pt_type = PTYPE_VOLHDR;
    pvh->vh_pt[ 8].pt_firstlbn = 0;
    pvh->vh_pt[ 7].pt_firstlbn = 1;
    pvh->vh_pt[ 7].pt_nblks = nsectors-1;
    pvh->vh_pt[ 7].pt_type = PTYPE_RAW;
    pvh->vh_pt[ 0] = pvh->vh_pt[ 7];
    pvh->vh_pt[ 1].pt_firstlbn = nsectors;
    pvh->vh_pt[ 1].pt_nblks = 0;
    pvh->vh_pt[ 1].pt_type = PTYPE_RAW;

    pvh->vh_csum = -vh_checksum(pvh);
    bzero(prd->base,prd->size); /* clear all sectors */
    bcopy(pvh,prd->base,sizeof(prd->vh)); /* vh in sec 0 */
}
/**************************************************************************
| rd_edtinit() is called whenever the driver is loaded, once for each
| VECTOR that names this driver.  A typical VECTOR line would be:
|     VECTOR module=ramdrive ctrl=2 base=0x00040000
| which says, initialize minor number 2 for a size of 256K.
|**************************************************************************/
int
rd_edtinit(register edt_t *pedt)
{
    register rd_info_t *prd;
    register __psint_t size;
    register int nsectors;
    register int ctlr = pedt->e_ctlr;
    /*
    || If this is the first time, allocate the rd_array of info structures.
    || Exit immediately if that fails.
    */
    if (!rd_basic())
    {
        return ENODEV;
    }
    DBGMSG3("ramdrive edtinit bustype %d adap %d ctlr %d\n",
        pedt->e_bus_type, pedt->e_adap, pedt->e_ctlr);
    DBGMSG3("                 e_space[0] iopaddr %x size %x vaddr %x\n",
        pedt->e_space[0].ios_iopaddr,pedt->e_space[0].ios_size,
        pedt->e_space[0].ios_vaddr);
    /*
    || Diagnose and reject an invalid minor dev# from VECTOR ctlr=
    */
    if (ctlr > rd_numdevs)
    {
        cmn_err(CE_ALERT,"ramdrive: ctlr=%d invalid minor dev#",ctlr);
        return ENODEV;
    }
    /*
    || Address the info structure and diagnose multiple initialization
    */
    prd = INFOPTR(ctlr);
    if (prd->base)
    {
        cmn_err(CE_ALERT,"ramdrive: duplicate VECTOR for ctlr=%d",ctlr);
        return EBUSY;
    }
    /*
    || The desired size of the ramdrive is encoded as the base=# value,
    || which is passed as the ios_vaddr value in the edt_t.
    || Diagnose 0 size (omitted base=). Round the size to a
    || multiple of *memory* (not necessarily I/O) pages.
    */
    size = (__psint_t) pedt->e_space[0].ios_vaddr;
    if ((0 == size)||(-1 == size))
    {
        cmn_err(CE_ALERT,
        "ramdrive: no size (base=) specified for ctlr=%d",ctlr);
        return EINVAL;
    }
    size = (size + (NBPP-1)) & (-NBPP); /* in sys/immu.h */
    /*
    || Allocate the kernel memory. Report an error if not possible.
    */
    prd->size = size;
    prd->base = kmem_alloc(size,KM_SLEEP);
    if (!prd->base)
    {
        cmn_err(CE_ALERT,
        "ramdrive: unable to allocate %x bytes for dev %d",size,ctlr);
        return ENOMEM;
    }
    nsectors = btod(size); /* immu.h bytes to disk sectors */
    DBGMSG3("ramdrive: dev# %d allocated %x = %x sectors\n",
                ctlr,size,nsectors);
    /*
    || Initialize the semaphore.
    */
    initnsema(&prd->queue,1,"ramdrive");
    /*
    || Initialize the "volume."
    */
    rd_format(prd);
    DBGMSG2("                 info at 0x%x  vh at 0x%x \n",
        prd, (__psint_t)(&prd->vh) );
    return 0;
}
/**************************************************************************
| rd_open() is called for each open() of a character device /dev/ramchr<n>,
| and during a mount of a block device /dev/ramblk<n>.  We can distinguish
| between types of open from the otyp.
|**************************************************************************/
int
rd_open(dev_t *pdev, int oflag, int otyp, cred_t *pcred)
{
    register rd_info_t *prd = INFOPTR(*pdev);
    register int error = 0;
    /*
    || Make sure the device being opened was initialized by a VECTOR.
    */
    if (!prd->base)
    {
        cmn_err(CE_NOTE,"ramdrive: open of uninitialized dev %d",*pdev);
        return ENODEV;
    }
    /*
    || Seize the device semaphore so that prd->rd_info can be updated
    || without error on a multiprocessor.
    */
    psema(&prd->queue,PZERO+1 | PCATCH);    
    /*
    || Implement FEXCL (exclusive) open for a privileged process only.
    || Exclusivity applies to the entire minor device, under both its
    || block and character special devices.
    */
    if (oflag & FEXCL)
    {
        if (drv_priv(pcred)) /* not privileged */
        {
            DBGMSG0("ramdrive: reject FEXCL with EPERM\n");
            error = EPERM;
        }
        else if (prd->copen+prd->bopen+prd->nmmap) /* current use? */
        {
            DBGMSG0("ramdrive: reject FEXCL with EBUSY\n");
            error = EBUSY;
        }
        else
        {
            prd->xopen = oflag; /* note device open exclusively */
        }
    }
    else /* nonexclusive request can be blocked by exclusive open */
    {
        if (prd->xopen)
        {
            DBGMSG0("ramdrive: reject normal open for exclusivity\n");
            error = EBUSY;
        }
    }
    if (!error)
    {
        /*
        || Count the open so we don't unload with open devices.
        */
        if (otyp & OTYP_CHR)
            ++prd->copen;
        else
            ++prd->bopen;
        DBGMSG4("ramdrive open: flag %x copen %d bopen %d xopen %d\n",
            oflag, prd->copen, prd->bopen, prd->xopen);
    }
    vsema(&prd->queue);
    return error;
}
/**************************************************************************
| rd_close() is not called for each close() but for the final close of a
| given device (character or block).  Clear the respective count of opens
| and note whether exclusivity is being given up.  Since a close() in
| one CPU could happen concurrently with an open() in another CPU, we 
| need to grab the semaphore before updating the rd_info.
| NOTE: the flag passed to close does not contain FEXCL even if it was
| given in the flag passed to open.
|**************************************************************************/
int
rd_close(dev_t dev, int flag, int otyp, cred_t *pcred)
{
    register rd_info_t *prd = INFOPTR(dev);
    psema(&prd->queue,PZERO+1 | PCATCH);    
    if (flag & FEXCL)
    {
        /* this is never entered */
    }
    if (otyp & OTYP_CHR)
    {
        prd->copen = 0;
    }
    else
    {
        prd->bopen = 0;
    }
    /* if all opens are closed, an exclusive one is closed */
    prd->xopen = 0;
    vsema(&prd->queue);
    DBGMSG4("ramdrive close: flag %x copen %d bopen %d xopen %d\n",
            flag, prd->copen, prd->bopen, prd->xopen);
    return 0;
}
/**************************************************************************
| rd_ioctl() is called for ioctl(2), which can only be used on a character
| device. Disk ioctl command numbers for are in sys/dkio.h.
| DIOCREADCAPACITY: supported just for fun.
| DIOCGETVH: supported because /etc/mkfs and other tools use it (which
| explains why you apply mkfs to the character, not the block, device).
| DIOCSETVH: allows a program to change the "volume header" info.
| DIOCFORMAT: clears the device contents to 0, rewrites the vol header.
|
| The DIOC(S|G)ETVH calls use only the info in the per-device structure
| in memory. We make no attempt to keep that info in step with the
| contents of sector 0 of the simulated media.  This is consistent with
| other current IRIX disk drivers.  This has the implications that:
|   - you can change the driver's idea of the disk geometry on the fly,
|     without actually formatting the disk, this is useful for scsi.
|   - if you want to make a permanent change in the volume header, 
|      -- one, that's a bad idea, use dvhtool(1) instead, but
|      -- two, if you insist, you need both a write to sector 0 and
|         a call to ioctl(,DIOCSETVH) to keep the driver up to date.
|
| Neither DIOCSETVH nor DIOCFORMAT hold the semaphore. You are strongly
| advised to do an exclusive open before calling them (but mkfp doesn't).
|**************************************************************************/
int
rd_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, cred_t *pcred, int *rval)
{
    register rd_info_t *prd = INFOPTR(dev);
    register int error = 0;
    register caddr_t kmemadr;
    register int len = 0;
    register int dir = 0; /* copyout */
    int capacity;
    switch(cmd)
    {
    case DIOCGETVH:
        {
            kmemadr = (caddr_t)(&prd->vh);
            len = sizeof(prd->vh);
            DBGMSG1("DIOCGETVH on %d\n",dev);
            break;
        }
    case DIOCREADCAPACITY:
        {
            capacity = prd->size/NBPSCTR;
            kmemadr = (caddr_t)(&capacity);
            len = sizeof(capacity);
            DBGMSG2("DIOCREADCAPACITY on %d = %d\n",
                        dev,capacity);
            break;
        }
    case DIOCSETVH:
        {
            kmemadr = (caddr_t)(&prd->vh);
            len = sizeof(prd->vh);
            dir = 1; /* copyin */
            DBGMSG1("DIOCSETVH on %d done\n",dev);
            break;
        }
    case DIOCFORMAT:
        {
            rd_format(prd);
            DBGMSG1("DIOCFORMAT done on %d!\n",dev);
            break;
        }
    default:
        {
            DBGMSG2("ramdrive invalid ioctl %x on %d\n",cmd,dev);
            error = EINVAL;
        }           
    } /* switch(cmd) */
    /*
    || Perform the copy to or from user space if needed.
    */
    if ((!error) && (len))
    {
        if (!dir)
        {
            DBGMSG3("ioctl copy kmem %x -> usr %x for %d\n",
                kmemadr, arg, len);
            error = copyout(kmemadr,arg,len);
        }
        else
        {
            DBGMSG3("ioctl copy usr %x -> kmem %x for %d\n",
                arg, kmemadr, len);
            error = copyin(arg,kmemadr,len);
        }
#ifdef DEBUG
        if (error)
            DBGMSG1("error %d on ioctl copy\n",error);
#endif
    }
    *rval = error; /* ensure user gets correct code */
    return error;
}
/**************************************************************************
| I/O Operations:
|
| rd_strategy() performs all actual I/O.  Called directly by file systems
| to read and write full I/O page units aligned on I/O page boundaries.
| Called indirectly to implement character I/O in any length and alignment.
|
| rd_read() and rd_write are called by read()/write() to a character
| device. They defer to rd_strategy via uiophysio().  This is consistent
| with the operation of other IRIX disk drivers. 
|
| The strategy code simply does a bcopy. This is highly unrealistic.
| A real device driver would have to deal with efficient sequencing of
| track numbers and with asynchronous interrupts.
|**************************************************************************/
int
rd_strategy(register struct buf *pbuf)
{
    register rd_info_t *prd = INFOPTR(pbuf->b_edev);
    register __psint_t offset = pbuf->b_blkno * NBPSCTR;
    register __psint_t count = pbuf->b_bcount;
    register caddr_t target = (caddr_t)((__psint_t)prd->base)+offset;
    DBGMSG3("rd_strategy: edev %d, flags %x, blkno %x\n",
                        pbuf->b_edev,pbuf->b_flags,pbuf->b_blkno);
    DBGMSG3("           : offset %x, count %x, dmaadr %x\n",
                        offset,count,(caddr_t)pbuf->b_dmaaddr);
    if (!VALIDIO(prd,offset,count))
    {
        DBGMSG0("rejecting strategy with ENOSPC\n");
        pbuf->b_error = ENOSPC;
        iodone(pbuf);
        return 0;
    }
    /*
    || Ensure that pbuf->b_dmaaddr is a valid kernel address.
    || This is never needed when called via uiophysio, only when
    || called from the file system or paging subsystem.  (Goodness!
    || wouldn't it be fun to use a ramdrive for swapping?)
    || NOTE: while a simple bp_mapin() call works, this approach
    || would impose unnecessary overhead in a real driver when
    || the device does not support scatter/gather.
    */
    if (!BP_ISMAPPED(pbuf))
    {
        bp_mapin(pbuf);
        DBGMSG1("        : after bp_mapin dmaadr %x\n", pbuf->b_dmaaddr);
    }
    /*
    || Grab the device semaphore. Note: this ensures consistency
    || between reads and writes, but does not control modifications
    || made through memory-mapped access.
    */
    psema(&prd->queue,PZERO+1 | PCATCH);    
    /*
    || Perform the "read" or "write."
    */  
    if (pbuf->b_flags & B_READ)
    {
        DBGMSG3("        : read %x to %x for %x\n",
            target,pbuf->b_dmaaddr,pbuf->b_bcount);
        bcopy(target,pbuf->b_dmaaddr,pbuf->b_bcount);
    }
    else
    {
        DBGMSG3("        : write %x to %x for %x\n",
            pbuf->b_dmaaddr,target,pbuf->b_bcount);
        bcopy(pbuf->b_dmaaddr,target,pbuf->b_bcount);
    }
    vsema(&prd->queue);
    iodone(pbuf);
    return 0;
}
int
rd_read(dev_t dev, uio_t *puio, cred_t *pcred)
{
    DBGMSG1("rd_read entered for dev %d\n",dev);
    return uiophysio(rd_strategy,0,dev,B_READ,puio);
}
int
rd_write(dev_t dev, uio_t *puio, cred_t *pcred)
{
    DBGMSG1("rd_write entered for dev %d\n",dev);
    return uiophysio(rd_strategy,0,dev,B_WRITE,puio);
}
int
rd_size(dev_t dev)
{
    DBGMSG1("rd_size entered for dev %d\n",dev);
    return rd_array[geteminor(dev)].size/NBPSCTR;
}
/**************************************************************************
| Memory mapping: rd_map() (one "m") is called to implement an mmap()
| request on a character device.  We permit read and write mappings, which
| means that in a multiprocessor, one CPU could be updating the kernel
| memory that represents the medium while another CPU executes a read()
| on the same memory.
|
| Since a map can persist after the corresponding FD is closed, we
| keep track of mappings separately from opens.
***************************************************************************/
int
rd_map(dev_t dev, vhandl_t *pvh, off_t off, int len, int prot)
{
    register rd_info_t *prd = INFOPTR(dev);
    int error;
    
    DBGMSG3("map request on %d at %x for %x\n",dev,off,len);
    if (VALIDIO(prd,off,len))
    {
        error = v_mapphys(pvh,prd->base+off,len);
#ifdef DEBUG
        if (error)
            DBGMSG1("v_mapphys returns %d\n",error);
#endif
    }
    else
    {
        DBGMSG0("rejecting map with ENOSPC\n");
        error = ENOSPC;
    }
    if (!error)
        ++prd->nmmap;
    return error;
}
rd_unmap(dev_t dev, vhandl_t *pvh)
{
    register rd_info_t *prd = INFOPTR(dev);
    if (prd->nmmap)
    {
        --prd->nmmap;
        DBGMSG2("unmap on %d, map count now %d\n",dev,prd->nmmap);
    }
    else
    {
        DBGMSG1("unmap on %d when map count 0 ?!?!?!?\n",dev);
    }
    return 0;
}
/**************************************************************************
| Unload support: rd_unload() is called when ml(1) is asked to unload
| this driver.  We test to make sure that none of our devices that have
| been initialized, are in use.  When any are in use, we return EBUSY
| and so will not be unloaded.
***************************************************************************/
int
rd_unload(void)
{
    int j;
    for (j = 0; j<rd_numdevs; ++j)
    {
        if (( rd_array[j].base )
        &&  ( rd_array[j].copen
            ||rd_array[j].bopen
            ||rd_array[j].nmmap) )
        {
            DBGMSG1("rejecting unload because dev %d busy\n",j);
            return EBUSY;
        }
    }
    DBGMSG0("accepting unload, byeeeee\n");
    return 0;
}


Next | Prev | Up | Top | Contents | Index